Morphomics Report

devtools::load_all("D:/source/SpatialPathomicsToolkit")

library(knitr)
library(tidyverse)

Load Data

dataDir="d:\\workSync\\HaichunYang\\202306_podocyteMorphomics\\202310_data\\"
setwd(dataDir)

files=list.files(pattern="*.csv")
samples=gsub("_.*","",files)
samples=gsub("6793-","",samples)
metaTable=data.frame(Sample=samples,File=files,stringsAsFactors=FALSE)

#celltypes=sapply(strsplit(files,"_"),function(x) x[2])
#metaTable=data.frame(Sample=samples,File=files,CellType=celltypes,stringsAsFactors=FALSE)

featureDataAll=ReadMorphomicFeatures(metaTable)

#add celltype column
featureDataAll[["SampleData"]]$CellType=sapply(strsplit(featureDataAll[["SampleData"]]$FileName,"_"),function(x) x[1])

Data Transformation

#temp=DataSelection(featureDataAll,featureType=c("AreaShape","Intensity"))
featureDataAllNormlized=MorphomicFeaturesNormlization(featureDataAll)
## [1] "Remove variables less than 10 unique values"
## [1] "23 features removed"
## [1] "Number of features after selection:"
## 
##          AreaShape        Granularity          Intensity          Neighbors RadialDistribution            Texture 
##                104                  3                 15                  6                 67                 52 
## [1] "Remove variables with NA"
## [1] "3 features removed"
## [1] "AngleBetweenNeighbors_Expanded;FirstClosestDistance_Expanded;SecondClosestDistance_Expanded"
## [1] "Number of features after selection:"
## 
##          AreaShape        Granularity          Intensity          Neighbors RadialDistribution            Texture 
##                104                  3                 15                  3                 67                 52 
## [1] "Remove variables less than 2% subjects with unique values"
## [1] "Normalization done"

Data Describe

library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.2.3

Raw

varForTable1=colnames(featureDataAllNormlized[["FeatureData"]])
s=describe(featureDataAllNormlized[["FeatureData"]][,varForTable1])
html(s, exclude1=FALSE,  what=c('%'),digits=3, prmsd=TRUE)

Transformation summary

print(table(featureDataAllNormlized[["FeatureDataNormlizedSummary"]]$note))
## 
##                          BoxCox (1)                    Boxcox trans.(2)                          Log trans.                            Raw data Raw due to fail in Boxcox trans.(2) 
##                                  13                                  16                                  92                                 121                                   2

After data Transformation

varForTable1=colnames(featureDataAllNormlized[["FeatureDataNormlized"]])
s=describe(featureDataAllNormlized[["FeatureDataNormlized"]][,varForTable1])
html(s, exclude1=FALSE,  what=c('%'),digits=3, prmsd=TRUE)

Perform PCA and Correlation

featureDataAllNormlized=featureDataPcaAll(featureDataAllNormlized)

PCA: Propotion of Variance Explained

By Feature Type

pList=list()
for (reductionName in c("AreaShape","Granularity"  ,"Intensity","Neighbors", "RadialDistribution" ,"Texture"  )){
  pList[[reductionName]]=plotPcaBarplot(featureDataAllNormlized,reductionName=paste0(reductionName,"_PCA"))+ggtitle(reductionName)
}
## Warning: package 'viridis' was built under R version 4.2.3
## Warning: package 'viridisLite' was built under R version 4.2.3
#show plots in pList by patchwork package
patchwork::wrap_plots(pList,ncol=2)

#patchwork::wrap_plots(pList,ncol=2,guides='collect')&scale_fill_gradientn(colors = color_palette,limits=c(0,30))

By Cell Type

pList=list()
for (reductionName in c("mesangial","PEC", "podocyte" ,"ALL"  )){
  pList[[reductionName]]=plotPcaBarplot(featureDataAllNormlized,reductionName=paste0(reductionName,"_PCA"))+ggtitle(reductionName)
}
patchwork::wrap_plots(pList,ncol=2)

PCA: Correlation of PCs in different cell

By Feature Type

# names(featureDataAllNormlized[["Reductions"]])
# 
# reductionName="Texture"
# temp1=featureDataAllNormlized[["Reductions"]][[reductionName]]$cell.embeddings[,3]
# 
# reductionName="podocyte_Texture"
# temp2=featureDataAllNormlized[["Reductions"]][[reductionName]]$cell.embeddings[,3]

plotPCsByCellType(featureDataAllNormlized,reductionName="AreaShape_PCA")

plotPCsByCellType(featureDataAllNormlized,reductionName="Granularity_PCA")

plotPCsByCellType(featureDataAllNormlized,reductionName="Intensity_PCA")

plotPCsByCellType(featureDataAllNormlized,reductionName="Neighbors_PCA")

plotPCsByCellType(featureDataAllNormlized,reductionName="RadialDistribution_PCA")

plotPCsByCellType(featureDataAllNormlized,reductionName="Texture_PCA")

All Features together

plotPCsByCellType(featureDataAllNormlized,reductionName="ALL_PCA",reductionNameToComp=c("mesangial_PCA","PEC_PCA" ,"podocyte_PCA"),PCs=1:9)

# plotPCsByCellType(featureDataAllNormlized,reductionName="ALL",reductionNameToComp=c("AreaShape","Granularity" ,"Intensity"),PCs=1:4)

PCA: Loading of PCs in different cell

plotPCsLoadingByCellType(featureDataAllNormlized,reductionName="AreaShape_PCA")

plotPCsLoadingByCellType(featureDataAllNormlized,reductionName="Granularity_PCA")

plotPCsLoadingByCellType(featureDataAllNormlized,reductionName="Intensity_PCA")

plotPCsLoadingByCellType(featureDataAllNormlized,reductionName="Neighbors_PCA")

plotPCsLoadingByCellType(featureDataAllNormlized,reductionName="RadialDistribution_PCA")

plotPCsLoadingByCellType(featureDataAllNormlized,reductionName="Texture_PCA")

All Features together

plotPCsLoadingByCellType(featureDataAllNormlized,reductionName="ALL_PCA",reductionNameToComp=c("mesangial_PCA","PEC_PCA" ,"podocyte_PCA"),PCs=1:9)

Feature Correlation Heatmap

plotCorHeatmap(featureDataAllNormlized,PCs=1:4)

plotCorHeatmap(featureDataAllNormlized,reductionName="podocyte_PCA",PCs=1:4)

plotCorHeatmap(featureDataAllNormlized,reductionName="Intensity_PCA",PCs=1:4)

plotCorHeatmap(featureDataAllNormlized,reductionName="podocyte_Intensity_PCA",PCs=1:4)

sparsePCA analysis

Perform sparsePCA based on PCA results

#another package
#devtools::install_github("erichson/spca")
library(sparsepca)
## Warning: package 'sparsepca' was built under R version 4.2.3
temp=featureDataAllNormlized
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=30,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$ALL_SparsePCA=resultSparsePCA


featureType="AreaShape"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)
## [1] "AreaShape were selected as featureType"
## [1] "Number of features after selection:"
## 
## AreaShape 
##       104
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=10,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$AreaShape_SparsePCA=resultSparsePCA

featureType="Intensity"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)
## [1] "Intensity were selected as featureType"
## [1] "Number of features after selection:"
## 
## Intensity 
##        15
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=4,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$Intensity_SparsePCA=resultSparsePCA

featureType="Neighbors"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)
## [1] "Neighbors were selected as featureType"
## [1] "Number of features after selection:"
## 
## Neighbors 
##         3
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=4,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$Neighbors_SparsePCA=resultSparsePCA

featureType="RadialDistribution"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)
## [1] "RadialDistribution were selected as featureType"
## [1] "Number of features after selection:"
## 
## RadialDistribution 
##                 67
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=10,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$RadialDistribution_SparsePCA=resultSparsePCA

featureType="Texture"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)
## [1] "Texture were selected as featureType"
## [1] "Number of features after selection:"
## 
## Texture 
##      52
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=4,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$Texture_SparsePCA=resultSparsePCA

featureType="Granularity"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)
## [1] "Granularity were selected as featureType"
## [1] "Number of features after selection:"
## 
## Granularity 
##           3
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=4,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$Granularity_SparsePCA=resultSparsePCA


# 
# dataForsPCA=temp[["FeatureDataNormlized"]]
# set.seed(123)
# rspca.results <- rspca((dataForsPCA), k=10, verbose=TRUE, max_iter=1000,center=TRUE, scale=TRUE)
# row.names(rspca.results$loadings)=colnames(dataForsPCA)
# head(rspca.results$loadings)
# 
# summary(rspca.results)
# 
# 
# featureType="Intensity"
# temp=DataSelection(featureDataAllNormlized,featureType=featureType)
# dataForsPCA=temp[["FeatureDataNormlized"]]
# 
# rspca.results <- rspca((dataForsPCA), k=5, verbose=TRUE, max_iter=1000,center=TRUE, scale=TRUE,alpha = 1e-03)
# row.names(rspca.results$loadings)=colnames(dataForsPCA)
# colnames(rspca.results$loadings)=paste0("PC",1:ncol(rspca.results$loadings))
# 
# head(rspca.results$loadings)
# #how many features for each PC
# apply(rspca.results$loadings,2,function(x) length(which(x!=0))/length(x))
# #Existing in how many PCs for each feature
# table(apply(rspca.results$loadings,1,function(x) length(which(x!=0))))
# 
# summary(rspca.results)

# temp=rspca.results$loadings
# temp[temp==0]=NA
# ComplexHeatmap::Heatmap(temp,cluster_columns = FALSE,cluster_rows =  FALSE)

sparsePCA loading Vis

for (reduction in c("ALL_SparsePCA","AreaShape_SparsePCA","RadialDistribution_SparsePCA")) {
  plot(plotPCsLoading(featureDataAllNormlized[["Reductions"]][[reduction]]$feature.loadings[,1:9])+ggtitle(reduction))
}

for (reduction in c("Intensity_SparsePCA","Neighbors_SparsePCA","Texture_SparsePCA","Granularity_SparsePCA")) {
  plot(plotPCsLoading(featureDataAllNormlized[["Reductions"]][[reduction]]$feature.loadings[,1:3])+ggtitle(reduction))
}

# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$AreaShape_SparsePCA$feature.loadings[,1:9])
# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$RadialDistribution_SparsePCA$feature.loadings[,1:9])
# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$Intensity_SparsePCA$feature.loadings[,1:4])
# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$Neighbors_SparsePCA$feature.loadings[,1:4])
# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$Texture_SparsePCA$feature.loadings[,1:4])
# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$Granularity_SparsePCA$feature.loadings[,1:4])

Differential detection

Diff By cell type in all samples

testResult=testFeatureDiff(featureDataAllNormlized,dataName="AreaShape_PCA",groupColumn="CellType")
knitr::kable(head(testResult))
pValue mesangial PEC podocyte pAdj
PC1 0.0000002 -1.0709680 1.3836765 -0.2211795 0.0000079
PC2 0.0000000 -1.7483061 1.2918140 0.7335462 0.0000000
PC3 0.3333083 0.1845570 -0.2341403 0.0332326 0.4812937
PC4 0.0015975 0.0359318 -0.3347513 0.3337718 0.0151033
PC5 0.1260320 -0.0398208 0.1939320 -0.1695555 0.2912740
PC6 0.1461189 0.0157159 0.1556479 -0.1960573 0.3233268
plotFeatureByGroup(featureDataAllNormlized,dataName="AreaShape_PCA",featureNames=c("PC1","PC2","PC4","PC5"),groupColumn="CellType")

Diff By Sample in one cell type

temp=DataSelection(featureDataAllNormlized,CellType="mesangial")

[1] “mesangial were selected as CellType” [1] “Number of Sample/CellType after selection:”

    mesangial

AF-10 93 AF-7 81 AF-8 52 AF-9 88

testResult=testFeatureDiff(temp,dataName="AreaShape_PCA",groupColumn="Sample")
knitr::kable(head(testResult))
pValue AF-10 AF-7 AF-8 AF-9 pAdj
PC1 0.0232520 -1.0038110 -1.5160756 0.5604785 -1.6962761 0.2969516
PC2 0.0140227 -1.2655242 -2.4173712 -1.5701714 -1.7479360 0.2916714
PC3 0.7268383 0.0880016 0.2199501 0.4413957 0.1022524 0.8688641
PC4 0.3827271 -0.1403118 -0.1022275 0.2612373 0.2162236 0.7196790
PC5 0.2570135 0.0164711 0.1119739 0.0826852 -0.3114212 0.6591459
PC6 0.0172450 -0.3151867 0.0401784 0.4218819 0.1028959 0.2934276
plotFeatureByGroup(temp,dataName="AreaShape_PCA",featureNames=c("PC2","PC5"),groupColumn="Sample")

#extract cells with largest PC2 and smallest PC2
cellEmbeddingsTable=temp[["Reductions"]][["AreaShape_PCA"]]$cell.embeddings[row.names(temp[["SampleData"]]),]

temp1=cellEmbeddingsTable[head(order(cellEmbeddingsTable[,"PC2"])),][,1:3]
temp2=cellEmbeddingsTable[tail(order(cellEmbeddingsTable[,"PC2"])),][,1:3]

Diff By Group in each cell type

featureDataAllNormlized[["SampleData"]]$Group=ifelse(featureDataAllNormlized[["SampleData"]]$Sample %in% c("AF-7","AF-8"),"Sample78","Sample910")

allCellTypes=unique(featureDataAllNormlized[["SampleData"]]$CellType)
testResultList=list()
for (cellType in allCellTypes) {
  temp=DataSelection(featureDataAllNormlized,CellType=cellType)
  testResult=testFeatureDiff(temp,dataName="AreaShape_PCA",groupColumn="Group")
  #knitr::kable(head(testResult))
  testResultList[[cellType]]=testResult
}

[1] “PEC were selected as CellType” [1] “Number of Sample/CellType after selection:”

    PEC

AF-10 85 AF-7 68 AF-8 50 AF-9 80 [1] “mesangial were selected as CellType” [1] “Number of Sample/CellType after selection:”

    mesangial

AF-10 93 AF-7 81 AF-8 52 AF-9 88 [1] “podocyte were selected as CellType” [1] “Number of Sample/CellType after selection:”

    podocyte

AF-10 84 AF-7 59 AF-8 31 AF-9 76

for (cellType in names(testResultList)) {
  print(knitr::kable(head(testResultList[[cellType]]),caption = cellType))
}
PEC
pValue Sample78 Sample910 pAdj
PC1 0.1647764 1.9417594 0.9845627 0.9019341
PC2 0.0245740 2.0304581 0.7635715 0.8518979
PC3 0.3595083 0.0405176 -0.4305623 0.9740970
PC4 0.8446652 -0.3158944 -0.3482368 0.9847101
PC5 0.6300036 0.2955662 0.1212481 0.9811349
PC6 0.2848396 0.3101746 0.0451378 0.9740970
mesangial
pValue Sample78 Sample910 pAdj
PC1 0.2174119 -0.7041898 -1.3404791 0.6824032
PC2 0.0446062 -2.0861352 -1.5000670 0.4079057
PC3 0.3620851 0.3065303 0.0949302 0.7515257
PC4 0.8125580 0.0398790 0.0330314 0.9707279
PC5 0.1358766 0.1005227 -0.1429462 0.5887988
PC6 0.0264400 0.1894158 -0.1119200 0.4079057
podocyte
pValue Sample78 Sample910 pAdj
PC1 0.0536558 0.7163934 -0.7485642 0.6846849
PC2 0.0454283 1.2657749 0.4341676 0.6846849
PC3 0.6227424 0.1249957 -0.0183841 0.9675981
PC4 0.0550258 -0.0586012 0.5544816 0.6846849
PC5 0.8740448 -0.0615449 -0.2303114 0.9825623
PC6 0.5909086 -0.3399449 -0.1151205 0.9675981
temp=DataSelection(featureDataAllNormlized,CellType="mesangial")

[1] “mesangial were selected as CellType” [1] “Number of Sample/CellType after selection:”

    mesangial

AF-10 93 AF-7 81 AF-8 52 AF-9 88

plotFeatureByGroup(temp,dataName="AreaShape_PCA",featureNames=c("PC2"),groupColumn="Group")

temp=DataSelection(featureDataAllNormlized,CellType="PEC")

[1] “PEC were selected as CellType” [1] “Number of Sample/CellType after selection:”

    PEC

AF-10 85 AF-7 68 AF-8 50 AF-9 80

plotFeatureByGroup(temp,dataName="AreaShape_PCA",featureNames=c("PC2","PC6"),groupColumn="Group")

temp=DataSelection(featureDataAllNormlized,CellType="podocyte")

[1] “podocyte were selected as CellType” [1] “Number of Sample/CellType after selection:”

    podocyte

AF-10 84 AF-7 59 AF-8 31 AF-9 76

plotFeatureByGroup(temp,dataName="AreaShape_PCA",featureNames=c("PC1","PC2","PC4"),groupColumn="Group")

#PC2 is more related
#extract cells with largest PC2 and smallest PC2
cellEmbeddingsTable=featureDataAllNormlized[["Reductions"]][["AreaShape_PCA"]]$cell.embeddings
temp1=cellEmbeddingsTable[head(order(cellEmbeddingsTable[,"PC2"])),][,1:3]
temp2=cellEmbeddingsTable[tail(order(cellEmbeddingsTable[,"PC2"])),][,1:3]

#dput(row.names(temp1))
#dput(row.names(temp2))